# coding: utf-8
# 文件名称: 糗事百科-正则.py
# 创建时间: 2021/6/15 20:24
import requests
import re
import csv
import codecs


contents = []
def get_lits(url):
    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36 Edg/91.0.864.48'
    }
    html = requests.get(url, headers=headers)
    text = html.text
    titles = re.findall('<a\sclass="recmd-content".*?>(.*?)</a>', text)
    urls = re.findall('<a\sclass="recmd-content"\shref="(.+?)".*?>.*?</a>', text)


    zips = zip(titles, urls)
    for z in zips:
        title, url = z
        content = {
            'title':title,
            'url':'https://www.qiushibaike.com' + url
        }

        contents.append(content)




def data_save(data):
    headers = ['title', 'url']
    with open('糗事百科.csv', 'w', encoding='utf-8-sig', newline='') as f:
        w = csv.DictWriter(f, headers)
        w.writeheader()
        w.writerows(data)


def main():
    urls = [f'https://www.qiushibaike.com/8hr/page/{i}/' for i in range(1, 3)]

    # url = 'https://www.qiushibaike.com/8hr/page/1'
    for url in urls:
        data = get_lits(url)

    data_save(contents)






if __name__ == "__main__":
    main()

print(contents)